In [1]:
import json
from bs4 import BeautifulSoup
import requests
import time
import csv
from functools import reduce
from selenium import webdriver
import datetime 

In [2]:
# Import housing listing scraped from Step1 
with open('./data/listingURL.csv') as listingFile:
    listings = [line.strip() for line in listingFile]

finalListings = list(set(listings))

In [3]:
#get details for individual listings
def getSingleListingDetails(listingURL, results): 

    listingURL ='https://'+listingURL + '?source_impression_id=p3_1565916454_fq3L4OEYsMClvED9'
    # listingURL = 'http://www.google.com'
    driver = webdriver.Chrome('./chromedriver') 
    driver.get(listingURL) 
    
    ## start scripting
    try:
        html = driver.page_source
        soup = BeautifulSoup(html, "lxml")
        time.sleep(5)
        
        try:
            listingNm = (soup.find('div', itemprop = 'name')).select('h1 span')[0].string
            print(listingNm)
        except:
            print("No listing name")
            listingNm = None
            
        try:
            listingLoc = (soup.find('div', class_ = '_czm8crp')).string
            print(listingLoc)
        except:
            print("No listing location")
            listingLoc = None

        try:
            listingHouse = (soup.find('div', class_ = '_n5lh69r'))  ##subgroup

            listingHouseType = (listingHouse.find('div', class_ = '_1p3joamp')).string
            print(listingHouseType)
        except:
            print("No listing housing type")
            listingHouseType = None
            
        try:
            numHouses = listingHouse.select('div._czm8crp')  ##subgroup
            numGuests = numHouses[0].string
            numBedrooms = numHouses[1].string
            numBeds = numHouses[2].string
            numBaths = numHouses[3].string
            print(numGuests)
            print(numBedrooms)
            print(numBeds)
            print(numBaths)
        except:
            print("No listing housing type details")
            numGuests = None
            numBedrooms = None            
            numBeds = None
            numBaths = None 
            
        try:
            ##get calendar 
            calendar = soup.find('div', attrs={"aria-label": "Calendar"}) ##subgroup
            Months = [x.strong.string for x in calendar.find_all('div', class_ = '_gucugi')]
            monthsString = reduce(lambda x,y : x + ',' + y, Months)
            print(monthsString)
        except:
            print("No Calendar Month")
            monthsString = None
                  
        try:
            ##get calendar dates available
            ##add up the dates that are available
            calendarMonthAvailableDts = []
            calendarMonths = [x.tbody for x in calendar.find_all('table', class_ = '_p5jgym')] ##subgroup
            for x in calendarMonths:
                calendarMonthRow = x.find_all('tr')
                countDts = 0
                for row in calendarMonthRow:
                    calendarDate = row.find_all('td', role = 'button')

                    for date in calendarDate:
            #             print(date['aria-label'])
                        if not (date['aria-label']).startswith('Not'):
                            countDts += 1
                calendarMonthAvailableDts.append(countDts)

            availableDts = reduce(lambda x,y : str(x) + ',' + str(y), calendarMonthAvailableDts)
            print(availableDts)
        except:
            print("No Available Dates")
            availableDts = None

        try:
            ##Reviews
            Reviews = soup.find('div', id = 'reviews') ##subgroup
            reviewTotal = ((Reviews.find('span', class_ = '_s1tlw0m').string).split(' '))[0]
            reviewStar = (Reviews.find('div', itemprop = 'ratingValue'))['content']
            print(reviewTotal)
            print(reviewStar)
        except:
            print("No Reviews")
            reviewTotal = None
            reviewStar = None
    
        try:
            ##Neighborhood
            Neighborhood = soup.find('div', id = 'neighborhood') ##subgroup
            neighborhoodSpan = [x.string for x in Neighborhood.find('p', class_ = '_6z3til').find('span', class_ = '_czm8crp').find_all('span')] ##subgroup
            neighborhoodSummary = reduce(lambda x, y : x + '\n' + y, neighborhoodSpan)
            print(neighborhoodSummary)
        except:
            print("No Neigohorhood Summary")
            neighborhoodSummary = None

        try:
            ##Price 
            price = (soup.find('span', class_ = '_doc79r')).string
            print(price)
        except:
            print("No Price")
            price = None

        try:
            ##Amendity
            element = driver.find_element_by_css_selector('div#amenities button')
            webdriver.ActionChains(driver).move_to_element(element).click(element).perform()
            time.sleep(1.5)
            amendities = driver.find_element_by_css_selector('div._ag795hh').text
            print(amendities)
        except:
            print("No Amendities")
            amendities = None 
        
        listingDetailsDict = {}
        listingDetailsDict['id'] = ((listingURL.split('/')[-1]).split('?'))[0]
        listingDetailsDict['listingNm'] = listingNm
        listingDetailsDict['listingLoc'] = listingLoc
        listingDetailsDict['listingHouseType'] = listingHouseType
        listingDetailsDict['numGuests'] = numGuests
        listingDetailsDict['numBedrooms'] = numBedrooms
        listingDetailsDict['numBeds'] = numBeds
        listingDetailsDict['numBaths'] = numBaths
                  
        listingDetailsDict['monthsString'] = monthsString
        listingDetailsDict['availableDts'] = availableDts
        listingDetailsDict['reviewTotal'] = reviewTotal
        listingDetailsDict['reviewStar'] = reviewStar
        listingDetailsDict['neighborhoodSummary'] = neighborhoodSummary
        listingDetailsDict['price'] = price
        listingDetailsDict['amendities'] = amendities
        results.append(listingDetailsDict)
    except Exception as e:
        print(e)
    
    driver.quit() 
                  
    return results
                  

In [4]:
def write_listing_details(results):
    fields = results[0].keys()
    with open('./data/seattle_individual_listing_details.csv', 'a') as f:
        dw = csv.DictWriter(f, fieldnames = fields, delimiter = '|')
        dw.writer.writerow(dw.fieldnames)
        dw.writerows(results)
        
    f.close()

In [7]:
# def getAllListingDetails(listingURLs):
#     results = []
#     print(len(listingURLs))
#     for listing in listingURLs:
#         ##get details
#         results = getSingleListingDetails(listing, results)
#         print(len(results))

      
#     ##write results
#     write_listing_details(results) 
    
# getAllListingDetails(finalListings)
# # getAllListingDetails(['www.airbnb.com/rooms/4956314'])

In [5]:
results = []

for i in range(2464,2711): #[)
    ##get details
    results = getSingleListingDetails(finalListings[i], results)
    
print(len(results))


##write results
write_listing_details(results) 
    
# getAllListingDetails(finalListings)
# getAllListingDetails(['www.airbnb.com/rooms/4956314'])

Mountain View Room
Seattle
Private room in house
2 guests
1 bedroom
1 bed
1.5 shared baths
July 2019,August 2019,September 2019,October 2019
0,7,1,0
51
4.5
University Park is a more quiet residential area only with many owner occupied homes, far enough away from student mayhem.
$65
Amenities
Basic
Wifi
Continuous access in the listing
Dryer
In the building, free or for a fee
Washer
In the building, free or for a fee
Essentials
Towels, bed sheets, soap, and toilet paper
Heating
Central heating or a heater in the listing
Hot water
Facilities
Free street parking
Dining
Kitchen
Space where guests can cook their own meals
Coffee maker
Cooking basics
Pots and pans, oil, salt and pepper
Dishes and silverware
Dishwasher
Microwave
Refrigerator
Oven
Stove
Guest access
Lockbox
Logistics
Luggage dropoff allowed
For guests' convenience when they have early arrival or late departure
Bed and bath
Shampoo
Lock on bedroom door
Private room can be locked for safety and privacy
Outdoor
BBQ grill
Garden o

In [9]:
len(results)

2712

In [6]:
len(results)

1146

In [7]:
write_listing_details(results)